# -*- coding: utf-8 -*-
import scrapy
from scrapy.http.request import Request

from autoDangdang.items import AutodangdangItem

class DangdangSpider(scrapy.Spider):
    name = 'dangdang'
    allowed_domains = ['dangdang.com']

    #从第一页开始爬取
    start_urls = ['http://category.dangdang.com/pg1-cp01.04.00.00.00.00.html']

    def parse(self, response):
        item = AutodangdangItem()
        item['name'] = response.xpath("//a[@class='pic']/@title").extract()
        item['price'] = response.xpath("//span[@class='search_now_price']/text()").extract()
        item['link'] = response.xpath("//a[@class='pic']/@href").extract()
        item['comnum'] = response.xpath("//a[@class='search_comment_num']/text()").extract()
        
        #爬取完后，返回item
        yield item
        
        #接下来通过循环自动爬取到75页的数据
        for i in range(2,5):
            url = "http://category.dangdang.com/pg"+str(i)+"-cp01.04.00.00.00.00.html"
            
            #通过yield返回request，并指定抓取的网站和回调函数
            yield Request(url,callback=self.parse)
        